<?php

namespace app\demo\controller;

use think\App;

class Crawler extends Base
{

    // 网络爬虫
    public function crawler() {
        $url = "https://nwzimg.wezhan.cn/pubsf/10273/10273476/cdn-static-pages/newsinfo/pc/7062771_zh-cn.html.Body.js?version=20241009162100";
        $html_raw = curl_get_data($url);
        // 使用正则表达式匹配并提取 'anystring'
        if (preg_match("/document\.write\('([^']*)'\);/", $html_raw, $matches)) {
            // $matches[1] 将包含匹配到的 'anystring'
            $extractedString = $matches[1];
        } else {
            die("没有找到匹配的内容1");
        }
        $str = "{\"data\":\"$extractedString\"}";
        $data = json_decode($str,true);
        $html = $data['data'];
//        file_put_contents("tmp.html", $html, FILE_APPEND);
//        $html = file_get_contents("tmp.html");
        $html = str_replace(["\r","\n"],"",$html);
//        die($html);
        if (preg_match('/<div class="w-detail">(.*?)<\/div>/', $html, $matches2)) {
            $article_content = $matches2[1];
        } else {
//            echo($html);
            die("未匹配");
        }
        echo($article_content);
    }

    public function test2() {
        $html = file_get_contents("tmp.html");
        $html = str_replace(["\r","\n"],"",$html);
//        var_dump($html);
//        var_dump($html);
//        $html = '<html><div>title</div><div class="w-detail"><b>hello</b></div></html>';
        if (preg_match('/<div class="w-detail">(.*?)<\/div>/', $html, $matches2)) {
            $article_content = $matches2[1];
        } else {
            die("未匹配");//
        }
        die($article_content);
    }

}
